{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4d2393b8-4965-46f1-91d4-78c30e573be1",
   "metadata": {},
   "source": [
    "# Chapter 3: Linear Regression for a housing dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a35515d8-1f6f-45e3-afe3-765f8ef9ec6b",
   "metadata": {},
   "source": [
    "### Importing the necessary packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "39b72d38-411a-4e8a-ac59-4fafa4a4584d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "import numpy\n",
    "import tqdm # progress bar"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5fb399a2-8d72-4d89-a4a5-6f659c86d15e",
   "metadata": {},
   "source": [
    "### Plotting functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "29370b56-fb2d-44b5-87f4-0e1f4193da67",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We don't use plt.show() because there are times when combine these.\n",
    "\n",
    "def plot_scatter(x_iterable, y_iterable, x_label = \"\", y_label = \"\"):\n",
    "    x_array = numpy.array(x_iterable)\n",
    "    y_array = numpy.array(y_iterable)\n",
    "    plt.scatter(x_array, y_array)\n",
    "    plt.xlabel(x_label)\n",
    "    plt.ylabel(y_label)\n",
    "\n",
    "def draw_line(slope, y_intercept, color='grey', linewidth=0.7, starting=0, ending=8):\n",
    "    x = numpy.linspace(starting, ending, 1000)\n",
    "    plt.plot(x, y_intercept + slope*x, linestyle='-', color=color, linewidth=linewidth)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5db8c9fd-9e3c-413d-9445-42ce409c69f4",
   "metadata": {},
   "source": [
    "### Coding the tricks\n",
    "\n",
    "- Simple trick\n",
    "- Absolute trick\n",
    "- Square trick"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8d37ba4f-6f1a-4f8c-822d-c40cc699e953",
   "metadata": {},
   "outputs": [],
   "source": [
    "def simple_trick(bias, slope, predictor, current_value):\n",
    "    \"\"\"\n",
    "    y ~ bias + slope*predictor\n",
    "\n",
    "    The simple trick performs small pertubations\n",
    "    \"\"\"\n",
    "    small_random_1 = numpy.random.random()*0.1\n",
    "    small_random_2 = numpy.random.random()*0.1\n",
    "\n",
    "    if predictor == 0:\n",
    "        return slope, bias\n",
    "\n",
    "    predicted_value = bias + slope*predictor\n",
    "    if current_value > predicted_value:\n",
    "        bias += small_random_2\n",
    "        if predictor > 0:\n",
    "            slope += small_random_1\n",
    "        elif predictor < 0:\n",
    "            slope -= small_random_1\n",
    "    if current_value < predicted_value:\n",
    "        slope -= small_random_1\n",
    "        if predictor > 0:\n",
    "            bias -= small_random_2\n",
    "        elif predictor < 0:\n",
    "            bias += small_random_2\n",
    "\n",
    "    return slope, bias\n",
    "\n",
    "def absolute_trick(bias, slope, predictor, current_value, learning_rate):\n",
    "    \"\"\"\n",
    "    y ~ bias + slope*predictor\n",
    "\n",
    "    Performs increments wrt a scaled value\n",
    "    \"\"\"\n",
    "    predicted_value = bias + slope*predictor\n",
    "    if current_value > predicted_value:\n",
    "        slope += learning_rate*predictor\n",
    "        bias += learning_rate\n",
    "    else:\n",
    "        slope -= learning_rate*predictor\n",
    "        bias -= learning_rate\n",
    "    return slope, bias\n",
    "\n",
    "def square_trick(bias, slope, predictor, current_value, learning_rate):\n",
    "    \"\"\"\n",
    "    y ~ bias + slope*predictor\n",
    "\n",
    "    Performs increments wrt a scaled value and difference\n",
    "    \"\"\"\n",
    "    predicted_value = bias + slope*predictor\n",
    "    slope += learning_rate*predictor*(current_value-predicted_value)\n",
    "    bias += learning_rate*(current_value-predicted_value)\n",
    "    return slope, bias"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "39215afc-ff96-4a60-8647-31f134411e5b",
   "metadata": {},
   "source": [
    "### Constructing the linear regression algorithm\n",
    "\n",
    "The linear regression algorithm consists of:\n",
    "- Starting with random weights\n",
    "- Iterating the square (or simple, or absolute) trick many times."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7cac9f6d-5ccd-48ff-882e-2e14802a8664",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We want to be able to use different tricks, so we pass the function as a parameter\n",
    "\n",
    "def perform_one_epoch(bias, slope, predictor, current_value, trick_function, learning_rate, ):\n",
    "    \"\"\"\n",
    "    There's probably a better way to do this with kwargs\n",
    "    \"\"\"\n",
    "    if learning_rate:\n",
    "        slope, bias = trick_function(\n",
    "            bias, slope, predictor, current_value, learning_rate=learning_rate)\n",
    "    else:\n",
    "        slope, bias = trick_function(\n",
    "            bias, slope, predictor, current_value)\n",
    "    return slope, bias\n",
    "\n",
    "def simple_linear_regression(\n",
    "        features,\n",
    "        labels,\n",
    "        trick_function = absolute_trick,\n",
    "        learning_rate=0.01,\n",
    "        epochs = 1000,\n",
    "        plot_all_epochs = True):\n",
    "    \"\"\"\n",
    "    trick_function must follow y ~ b0 + b1x with parameters:\n",
    "        bias,\n",
    "        slope,\n",
    "        predictor,\n",
    "        current_value,\n",
    "        learning_rate (optional for simple_trick)    \n",
    "    \"\"\"\n",
    "\n",
    "    slope = numpy.random.random()\n",
    "    bias = numpy.random.random()\n",
    "    errors = []\n",
    "    for epoch in tqdm.tqdm(range(epochs)):\n",
    "        if plot_all_epochs:\n",
    "            draw_line(slope, bias, starting=min(features)-1, ending=max(features)+1)\n",
    "        index_random = numpy.random.randint(0, len(features)-1)\n",
    "        predictor = features[index_random]\n",
    "        current_value = labels[index_random]\n",
    "        slope, bias = perform_one_epoch(\n",
    "            bias, slope, predictor, current_value, trick_function, learning_rate)\n",
    "    draw_line(slope, bias, 'black', starting=0, ending=9)\n",
    "    plot_scatter(features, labels)\n",
    "    plt.show() #plots all epochs, line, and scatter\n",
    "\n",
    "    return slope, bias"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c82a21e8-e7c7-479e-b002-fe325b0cf425",
   "metadata": {},
   "source": [
    "### Adding error metrics to our model\n",
    "\n",
    "Now our linear regression algorithm consists of:\n",
    "- Starting with random weights\n",
    "- Iterating the square (or simple, or absolute) trick many times.\n",
    "- Plotting the error function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bb80e385-5f00-4973-8114-335389fa494d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def rmse(labels, predictions):\n",
    "    \"\"\" The root mean square error function \"\"\"\n",
    "    n = len(labels)\n",
    "    differences = numpy.subtract(labels, predictions)\n",
    "    return numpy.sqrt(1.0/n * (numpy.dot(differences, differences)))\n",
    "\n",
    "def linear_regression(\n",
    "        features,\n",
    "        labels,\n",
    "        trick_function = absolute_trick,\n",
    "        learning_rate=0.01,\n",
    "        error_metric = rmse, # error function as parameter\n",
    "        epochs = 1000,\n",
    "        plot_all_epochs = True):\n",
    "    \"\"\"\n",
    "    trick_function must follow y ~ b0 + b1x with parameters:\n",
    "        bias, \n",
    "        slope, \n",
    "        predictor, \n",
    "        current_value,\n",
    "        learning_rate (optional for simple_trick)\n",
    "    error_metric must take two arrays and return a scalar\n",
    "    \n",
    "    \"\"\"\n",
    "    slope = numpy.random.random()\n",
    "    bias = numpy.random.random()\n",
    "    errors = [] # logging errors\n",
    "    for epoch in tqdm.tqdm(range(epochs)):\n",
    "        if plot_all_epochs:\n",
    "            draw_line(slope, bias, starting=min(features)-1, ending=max(features)+1)\n",
    "\n",
    "        # Scoring predictions and storing error\n",
    "        predictions = features[0]*slope+bias\n",
    "        errors.append(error_metric(labels, predictions))\n",
    "        \n",
    "        index_random = numpy.random.randint(0, len(features)-1)\n",
    "        predictor = features[index_random]\n",
    "        current_value = labels[index_random]\n",
    "        slope, bias = perform_one_epoch(\n",
    "            bias, slope, predictor, current_value, trick_function, learning_rate)\n",
    "    draw_line(slope, bias, 'black', starting=0, ending=9)\n",
    "    plot_scatter(features, labels)\n",
    "    plt.show()\n",
    "    # plotting error\n",
    "    plot_scatter(range(len(errors)), errors, \"Number of Epochs\", \"Numerical Error\")\n",
    "    plt.show()\n",
    "\n",
    "    return slope, bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f127075-9f80-4dc0-96a4-1ab6524a35dd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
